Using the {vtree} package to visualize discrete multivariate data (c) Nick Barrowman, Richard Webster 2022. Image credit: Ed323 at English Wikipedia, Public domain, via Wikimedia Commons

Load libraries

#library(vtree)
source("C:/Users/nickb/Documents/vtree/source.R")
library(dplyr)
library(haven)

Abstract

Variable trees are a new way to explore discrete multivariate data. They display nested subsets and corresponding frequencies and percentages. Manual calculation of these quantities is laborious and error-prone, especially when there are many multi-level factors and missing data. In this tutorial, you’ll learn about the {vtree} package and how to use it to quickly generate variable trees and gain new insights into your data. Using COVID-19 examples, you’ll also learn how to prune variable trees, display summary information, label variables and nodes, explore missing values, and more.

List with incremental bullets

Dataset

https://www.frontiersin.org/articles/10.3389/fcomm.2021.789272/full

Howdy partner

https://zenodo.org/record/5779516

Read data and perform pre-processing

data <- read_spss("Viral_Communication_Phase_I-III.sav")

#data <- read_spss("https://zenodo.org/record/5779516/files/Viral_Communication_Phase_I-III.sav")

d <- as_factor(data)

#View(Hmisc::label(d))
agreement <- function(x,neutralAgree=TRUE,agree="Agree",disagree="Disagree") {
  neutral <- if (neutralAgree) agree else disagree
  case_when(
    {{x}} == "Strongly Agree"    ~ agree,
    {{x}} == "Agree"             ~ agree,
    {{x}} == "Somewhat Agree"    ~ agree,
    {{x}} == "Neutral"           ~ neutral,
    {{x}} == "Somewhat Disagree" ~ disagree,
    {{x}} == "Disagree"          ~ disagree,
    {{x}} == "Strongly Disagree" ~ disagree)
}
d <- d %>%
  mutate(
    Phase_1=M_PHASE1_COMPETION,
    Phase_2=M_PHASE2_COMPLETION,
    Phase_3=M_PHASE3_COMPLETION,
    politics_1=factor(case_when(
      SD_POL_ORIENTATION == "Left"   ~ "Left",
      SD_POL_ORIENTATION == "-2"     ~ "Left",
      SD_POL_ORIENTATION == "-1"     ~ "Left",
      SD_POL_ORIENTATION == "Centre" ~ "Centre",
      SD_POL_ORIENTATION == "1"      ~ "Right",
      SD_POL_ORIENTATION == "2"      ~ "Right",
      SD_POL_ORIENTATION == "Right"  ~ "Right"),levels=c("Left","Centre","Right")),
    mask_1=case_when(
      PHASE1_AC_EFF_MASK == "Extremely effective"            ~ "Effective",
      PHASE1_AC_EFF_MASK == "Very effective"                 ~ "Effective",
      PHASE1_AC_EFF_MASK == "Effective"                      ~ "Effective",
      PHASE1_AC_EFF_MASK == "Somewhat effective"             ~ "Less effective",
      PHASE1_AC_EFF_MASK == "Not effective at all effective" ~ "Less effective"),
    world_1=agreement(PHASE1_AS_WORLD,neutralAgree=TRUE),
    science_eb_1=case_when(
      PHASE1_AS_BOR_EXC == "Exciting" ~ "Exciting",
      PHASE1_AS_BOR_EXC == "2"        ~ "Exciting",
      PHASE1_AS_BOR_EXC == "1"        ~ "Exciting",
      PHASE1_AS_BOR_EXC == "0"        ~ "Not Exciting",
      PHASE1_AS_BOR_EXC == "-1"       ~ "Not Exciting",
      PHASE1_AS_BOR_EXC == "-2"       ~ "Not Exciting",
      PHASE1_AS_BOR_EXC == "Boring"   ~ "Not Exciting"),
    harmless_1=agreement(PHASE1_RA_HARMLESS_r,neutralAgree=TRUE,
      agree="Harmless",disagree="Not Harmless"),
    finance_1=agreement(PHASE1_RA_FINANCE,neutralAgree=FALSE),
    economy_1=agreement(PHASE1_RA_ECONOMY,neutralAgree=FALSE))
u <- d %>% filter(M_PHASE1_COMPETION=="Yes")
lv <- c(
  science_eb_1="Science is",
  harmless_1="Covid is",
  world_1="Science is making the world a better place")

Completed study phases

vtree(d,"Phase_1 Phase_2 Phase_3",pattern=TRUE,showlegend=TRUE)

Completed study phases

Gender by completion of Phase 1

vtree(d,"Phase_1 SD_GENDER",labelvar=c(Phase_1="Completed Phase 1",SD_GENDER="Gender"),splitwidth=Inf)

Gender by completion of Phase 1

One

vtree(u,"science_eb_1",labelvar=c("science_eb_1"="Science is"))

Science stuff

Three

vtree(u,"PHASE1_AS_BOR_EXC=Exciting PHASE1_AC_EFF_MASK")

Science stuff

Four

vtree(u,"science_eb_1",labelvar=c("science_eb_1"="I think science is"))

More science stuff

Five

vtree(u,"science_eb_1 harmless_1 mask_1",
  labelvar=c(science_eb_1="Science is",harmless_1="Covid is"),pattern=TRUE)

More science stuff

Political orientation

vtree(u,"politics_1",splitwidth=Inf,labelvar=c(politics_1="Political orientation"),
  summary=c("harmless_1=Harmless \nCovid is harmless: %pct%","mask_1=Effective \nMasks are effective: %pct%"))

Political orientation

By gender

vtree(u,"SD_GENDER politics_1",splitwidth=Inf,
  summary=c("harmless_1=Harmless \nCovid is harmless: %pct%","mask_1=Effective \nMasks are effective: %pct%"))

By gender

Science

vtree(u,"SD_GENDER world_1",splitwidth=Inf,
  labelvar=lv)

Science

Finance

vtree(u,"finance_1",splitwidth=Inf,
  labelvar=lv)

Finance

Economy

vtree(u,"economy_1",splitwidth=Inf,
  labelvar=lv)

Economy

Finance -> Economy

vtree(u,"finance_1 economy_1",splitwidth=Inf,
  labelvar=lv)

Finance then Economy